function [clean, stats] = rnaseq_decontaminate(reads, varargin)

contaminants = human_contaminants();

clean = reads;

stats = struct;
stats.contaminants = contaminants.Name;
contaminant_reads = zeros(length(contaminants.Name), length(reads.url));

for s = 1:length(reads.url)
	fprintf(1, 'Checking for contamination in sample %s...\n', ...
		reads.meta.sample_id{s});
	[alignments, unaligned] = align_reads(filter(reads, s), ...
		contaminants.Sequence, 'MaxMismatches', 1, ...
		'ReportAlignments', 1, varargin{:});
	
	total_reads(s) = alignments.total_reads;
	
	for al = iterate_alignments(alignments)
		for t = str2double(al.target)'
			contaminant_reads(t, s) = contaminant_reads(t, s) + 1;
		end
	end
	
	clean.url{s} = unaligned.url{1};
end

stats.contaminant_reads = contaminant_reads;
stats.total_reads = total_reads;



	
	
	
	
	
	
	




function contaminants = human_contaminants()

global organism;

contaminants = struct;
contaminants.Name = {};
contaminants.Sequence = {};

% 40S - small ribosomal subunit in eukaryotes
contaminants.Name{end+1} = 'RN18S1';
contaminants.Sequence{end+1} = ...
'tacctggttgatcctgccagtagcatatgcttgtctcaaagattaagccatgcatgtctgagtacgcacggccggtacagtgaaactgcgaatggctcattaaatcagttatggttcctttggtcgctcgctcctctcctacttggataactgtggtaattctagagctaatacatgccgacgggcgctgacccccttcgcgggggggatgcgtgcatttatcagatcaaaaccaacccggtcagcccctctccggccccggccggggggcgggcgccggcggctttggtgactctagataacctcgggccgatcgcacgccccccgtggcggcgacgacccattcgaacgtctgccctatcaactttcgatggtagtcgccgtgcctaccatggtgaccacgggtgacggggaatcagggttcgattccggagagggagcctgagaaacggctaccacatccaaggaaggcagcaggcgcgcaaattacccactcccgacccggggaggtagtgacgaaaaataacaatacaggactctttcgaggccctgtaattggaatgagtccactttaaatcctttaacgaggatccattggagggcaagtctggtgccagcagccgcggtaattccagctccaatagcgtatattaaagttgctgcagttaaaaagctcgtagttggatcttgggagcgggcgggcggtccgccgcgaggcgagccaccgcccgtccccgccccttgcctctcggcgccccctcgatgctcttagctgagtgtcccgcggggcccgaagcgtttactttgaaaaaattagagtgttcaaagcaggcccgagccgcctggataccgcagctaggaataatggaataggaccgcggttctattttgttggttttcggaactgaggccatgattaagagggacggccgggggcattcgtattgcgccgctagaggtgaaattcttggaccggcgcaagacggaccagagcgaaagcatttgccaagaatgttttcattaatcaagaacgaaagtcggaggttcgaagacgatcagataccgtcgtagttccgaccataaacgatgccgaccggcgatgcggcggcgttattcccatgacccgccgggcagcttccgggaaaccaaagtctttgggttccggggggagtatggttgcaaagctgaaacttaaaggaattgacggaagggcaccaccaggagtggagcctgcggcttaatttgactcaacacgggaaacctcacccggcccggacacggacaggattgacagattgatagctctttctcgattccgtgggtggtggtgcatggccgttcttagttggtggagcgatttgtctggttaattccgataacgaacgagactctggcatgctaactagttacgcgacccccgagcggtcggcgtcccccaacttcttagagggacaagtggcgttcagccacccgagattgagcaataacaggtctgtgatgcccttagatgtccggggctgcacgcgcgctacactgactggctcagcgtgtgcctaccctacgccggcaggcgcgggtaacccgttgaaccccattcgtgatggggatcggggattgcaattattccccatgaacgaggaattcccagtaagtgcgggtcataagcttgcgttgattaagtccctgccctttgtacacaccgcccgtcgctactaccgattggatggtttagtgaggccctcggatcggccccgccggggtcggcccacggccctggcggagcgctgagaagacggtcgaacttgactatctagaggaagtaaaagtcgtaacaaggtttccgtaggtgaacctgcggaaggatcatta';

% 60S - large ribosomal subunit in eukaryotes
contaminants.Name{end+1} = 'RN28S1';
contaminants.Sequence{end+1} = [...
'cgcgacctcagatcagacgtggcgacccgctgaatttaagcatattagtcagcggaggagaagaaactaaccaggattccctcagtaacggcgagtgaacagggaagagcccagcgccgaatccccgccccgcggcggggcgcgggacatgtggcgtacggaagacccgctccccggcgccgctcgtggggggcccaagtccttctgatcgaggcccagcccgtggacggtgtgaggccggtagcggcccccggcgcgccgggcccgggtcttcccggagtcgggttgcttgggaatgcagcccaaagcgggtggtaaactccatctaaggctaaataccggcacgagaccgatagtcaacaagtaccgtaagggaaagttgaaaagaactttgaagagagagttcaagagggcgtgaaaccgttaagaggtaaacgggtggggtccgcgcagtccgcccggaggattcaacccggcggcgggtccggccgtgtcggcggcccggcggatctttcccgccccccgttcctcccgacccctccacccgccctcccttcccccgccgcccctcctcctcctccccggagggggcgggctccggcgggtgcgggggtgggcgggcggg' ...
'gccgggggtggggtcggcgggggaccgtcccccgaccggcgaccggccgccgccgggcgcatttccaccgcggcggtgcgccgcgaccggctccgggacggctgggaaggcccggcggggaaggtggctcggggggccccgtccgtccgtccgtccgtcctcctcctcccccgtctccgccccccggccccgcgtcctccctcgggagggcgcgcgggtcggggcggcggcggcggcggcggtggcggcggcggcggcggcggcgggaccgaaaccccccccgagtgttacagcccccccggcagcagcactcgccgaatcccggggccgagggagcgagacccgtcgccgcgctctcccccctcccggcgcccacccccgcggggaatcccccgcgaggggggtctcccccgcgggggcgcgccggcgtctcctcgtgggggggccgggccacccctcccacggcgcgaccgctctcccacccctcctccccgcgcccccgccccggcgacggggggggtgccgcgcgcgggtcggggggcggggcggactgtccccagtgcgccccgggcgggtcgcgccgtcgggcccgggggaggttctctcggggccacgcgcgcgtcccccgaagagggggacggcggagcgagcgcacggggtcggcggcgacgtcggctacccacccgacccgtcttgaaacacggaccaaggagtctaacacgtgcgcgagtcgggggctcgcacgaaagccgccgtggcgcaatgaaggtgaaggccggcgcgctcgccggccgaggtgggatcccgaggcctctccagtccgccgagggcgcaccaccggcccgtctcgcccgccgcgccggggaggtggagcacgagcgcacgtgttaggacccgaaagatggtgaactatgcctgggcagggcgaagccagaggaaactctggtggaggtccgtagcggtcctgacgtgcaaatcggtcgtccgacctgggtataggggcgaaagactaatcgaaccatctagtagctggttccctccgaagtttccctcaggatagctggcgctctcgcagacccgacgcacccccgccacgcagttttatccggtaaagcgaatgattagaggtcttggggccgaaacgatctcaacctattctcaaactttaaatgggtaagaagcccggctcgctggcgtggagccgggcgtggaatgcgagtgcctagtgggccacttttggtaagcagaactggcgctgcgggatgaaccgaacgccgggttaaggcgcccgatgccgacgctcatcagaccccagaaaaggtgttggttgatatagacagcaggacggtggccatggaagtcggaatccgctaaggagtgtgtaacaactcacctgccgaatcaactagccctgaaaatggatggcgctggagcgtcgggcccatacccggccgtcgccggcagtcgagagtggacgggagcggcgggggcggcgcgcgcgcgcgcgcgtgtggtgtgcgtcggagggcggcggcggcggcggcggcgggggtgtggggtccttcccccgcccccccccccacgcctcctcccctcctcccgcccacgccccgctccccgcccccggagccccgcggacgctacgccgcgacgagtaggagggccgctgcggtgagccttgaagcctagggcgcgggcccgggtggagccgccgcaggtgcagatcttggtggtagtagcaaatattcaaacgagaactttgaaggccgaagtggagaagggttccatgtgaacagcagttgaacatgggtcagtcggtcctgagagatgggcgagcgccgttccgaagggacgggcgatggcctccgttgccctcggccgatcgaaagggagtcgggttcagatccccgaatccggagtggcggagatgggcgccgcgaggcgtccagtgcggtaacgcgaccgatcccggagaagccggcgggagccccggggagagttctcttttctttgtgaagggcagggcgccctggaatgggttcgccccgagagaggggcccgtgccttggaaagcgtcgcggttccggcggcgtccggtgagctctcgctggcccttgaaaatccgggggagagggtgtaaatctcgcgccgggccgtacccatatccgcagcaggtctccaaggtgaacagcctctggcatgttggaacaatgtaggtaagggaagtcggcaagccggatccgtaacttcgggataaggattggctctaagggctgggtcggtcgggctggggcgcgaagcggggctgggcgcgcgccgcggctggacgaggcgccgccgccccccccacgcccggggcacccccctcgcggccctcccccgccccaccccgcgcgcgccgctcgctccctccccgccccgcgccctctctctctctctctcccccgctccccgtcctcccccctccccggggg' ...
'agcgccgcgtgggggcggcggcggggggagaagggtcggggcggcaggggccggcggcggcccgccgcggggccccggcggcgggggcacggtcccccgcgaggggggcccgggcacccggggggccggcggcggcggcgactctggacgcgagccgggcccttcccgtggatcgccccagctgcggcgggcgtcgcggccgcccccggggagcccggcgggcgccggcgcgcccccccccccaccccacgtctcgtcgcgcgcgcgtccgctgggggcggggagcggtcgggcggcggcggtcggcgggcggcggggcggggcggttcgtccccccgccctacccccccggccccgtccgccccccgttcccccctcctcctcggcgcgcggcggcggcggcggcaggcggcggaggggccgcgggccggtcccccccgccgggtccgcccccggggccgcggttccgcgcggcgcctcgcctcggccggcgcctagcagccgacttagaactggtgcggaccaggggaatccgactgtttaattaaaacaaagcatcgcgaaggcccgcggcgggtgttgacgcgatgtgatttctgcccagtgctctgaatgtcaaagtgaagaaattcaatgaagcgcgggtaaacggcgggagtaactatgactctcttaaggtagccaaatgcctcgtcatctaattagtgacgcgcatgaatggatgaacgagattcccactgtccctacctactatccagcgaaaccacagccaagggaacgggcttggcggaatcagcggggaaagaagaccctgttgagcttgactctagtctggcacggtgaagagacatgagaggtgtagaataagtgggaggcccccggcgcccccccggtgtccccgcgaggggcccggggcggggtccgccggccctgcgggccgccggtgaaataccactactctgatcgttttttcactgacccggtgaggcgggggggcgagccccgaggggctctcgcttctggcgccaagcgcccggccgcgcgccggccgggcgcgacccgctccggggacagtgccaggtggggagtttgactggggcggtacacctgtcaaacggtaacgcaggtgtcctaaggcgagctcagggaggacagaaacctcccgtggagcagaagggcaaaagctcgcttgatcttgattttcagtacgaatacagaccgtgaaagcggggcctcacgatccttctgaccttttgggttttaagcaggaggtgtcagaaaagttaccacagggataactggcttgtggcggccaagcgttcatagcgacgtcgctttttgatccttcgatgtcggctcttcctatcattgtgaagcagaattcaccaagcgttggattgttcacccactaatagggaacgtgagctgggtttagaccgtcgtgagacaggttagttttaccctactgatgatgtgttgttgccatggtaatcctgctcagtacgagaggaaccgcaggttcagacatttggtgtatgtgcttggctgaggagccaatggggcgaagctaccatctgtgggattatgactgaacgcctctaagtcagaatcccgcccaggcggaacgatacggcagcgccgcggagcctcggttggcctcggatagccggtcccccgcctgtccccgccggcgggccgccccccccctccacgcgccccgcgcgcgcgggagggcgcgtgccccgccgcgcgccgggaccggggtccggtgcggagtgcccttcgtcctgggaaacggggcgcggccggagaggcggccgccccctcgcccgtcacgcaccgcacgttcgtggggaacctggcgctaaaccattcgtagacgacctgcttctgggtcggggtttcgtacgtagcagagcagctccctcgctgcgatctattgaaagtcagccctcgacacaagggtttgtc'];

contaminants.Name{end+1} = 'RN5-8S1';
contaminants.Sequence{end+1} = ...
'gactcttagcggtggatcactcggctcgtgcgtcgatgaagaacgcagctagctgcgagaattaatgtgaattgcaggacacattgatcatcgacacttcgaacgcacttgcggccccgggttcctcccggggctacgcctgtctgagcgtcgctt';

contaminants.Name{end+1} = 'RN5S';
contaminants.Sequence{end+1} = ...
'gtctacggccataccaccctgaacgcgcccgatctcgtctgatctcggaagctaagcagggtcgggcctggttagtacttggatgggagaccgcctgggaataccgggtgctgtaggcttt';

% Mitochondrial RNA
contaminants.Name{end+1} = 'MT-DNA';
contaminants.Sequence{end+1} = ...
	organism.Chromosomes.Sequence{25}(1:organism.Chromosomes.Length(25));


